#Input--a formatted Lexus-Nexus file
#Output--files containing a specified number of
#stories between MIN and MAX below
#envoke--perl break.pl input_file 5000
#the $out variable contains the name of pre-existing folder

##
##  Programming supported by National Science Foundation Political Science Program Grant
##  SES-0719634 "Improving the Efficiency of Militarized Interstate Dispute Data Collection using
##  Automated Textual Analysis" and SES-0924240, "MID4: Updating the Militarized Dispute Data Set
##  2002-2010."

#!/usr/local/bin/perl

# ======== globals =========== #
$stories=0; #number of total stories
$out = "./out/"; #destination for output files
$MIN = 5000; #break into minimum 5000 stories per output file
$MAX =10000;#max number of stories per break-up file

# ======== main program =========== #

$inputPrefix = $ARGV[0];  # get input file prefix
if (length($inputPrefix) == 0) {
  print "\aPlease enter input file and number of stories the file to be broken into! -- please re-run program\n";
  exit;
}
$inputFile = $inputPrefix.".NoDupStories"; # file containing no dup stories

$mn = $ARGV[1]; #number of stories to break into

if ((int($mn) < $MIN) || (int($mn) > $MAX)) {
  print "\aPlease enter between $MIN and $MAX stories the file to be broken into! -- please re-run program\n";
  exit;
 }

 #open input, outputip file
open(FDIR,$inputFile) or die "Can\'t open input files $inputFile; error $!";

$outfile = $ARGV[2].".".$stories;   # output file
open (FOUT, ">$outfile")   or die "Can\'t open output file $outfile; error $!";
print FOUT "\n";
$count = $stories; #counter
  $line = <FDIR>;
  for $line (<FDIR>) {

   if ($line =~ m/---------------------------------------------------------------/) {
       $stories++;
       $count++;
     }

   if ($count < $mn) {
     print FOUT $line; #print as long as have less than $mn stories
  } else {
       close(FOUT);
       $count= 0;
       $outfile = $ARGV[2].".".$stories;   # output file
       open (FOUT, ">$outfile")   or die "Can\'t open output file $outfile; error $!";
       #we cut after the lines so we put the first -------- only, everything else ok
       print FOUT "\n---------------------------------------------------------------\n";
       print "\n==========================\nProcessing $outfile, story number $stories\n";
    } #else

 } #for file loop
#

 close(FDIR);

print "\n\aFinished: $stories stories processed in total.\n";
exit;
